# 热榜文章的时间分布：你可以对热榜文章的爬取日期进行分析，看看哪些日期的文章最多，以及是否有一些周期性的规律。
from collections import Counter
from datetime import datetime
import pandas as pd


class TimeStatistical:
    def __init__(self):
        self.weekdays = []
        self.hours = []
        self.weekdaysDict = dict
        self.hoursDict = dict

    def statistical(self, csv_path='./csv_collect/deep_articles.csv'):
        # 读取数据
        df = pd.read_csv(csv_path)

        # 清洗数据 生成目标列表
        for i in range(len(df)):
            self.extrac_Week_Hour(df["time"][i])

        # 使用Counter函数统计每个数字出现的次数
        counter_weekdays = Counter(self.weekdays)
        counter_hours = Counter(self.hours)

        # 制作键值对字典
        self.weekdaysDict = {
            'Monday': counter_weekdays[1],
            'Tuesday': counter_weekdays[2],
            'Wednesday': counter_weekdays[3],
            'Thursday': counter_weekdays[4],
            'Friday': counter_weekdays[5],
            'Saturday': counter_weekdays[6],
            'Sunday': counter_weekdays[7],
        }

        self.hoursDict = {
            '1点': counter_hours[1],
            '2点': counter_hours[2],
            '3点': counter_hours[3],
            '4点': counter_hours[4],
            '5点': counter_hours[5],
            '6点': counter_hours[6],
            '7点': counter_hours[7],
            '8点': counter_hours[8],
            '9点': counter_hours[9],
            '10点': counter_hours[10],
            '11点': counter_hours[11],
            '12点': counter_hours[12],
            '13点': counter_hours[13],
            '14点': counter_hours[14],
            '15点': counter_hours[15],
            '16点': counter_hours[16],
            '17点': counter_hours[17],
            '18点': counter_hours[18],
            '19点': counter_hours[19],
            '20点': counter_hours[20],
            '21点': counter_hours[21],
            '22点': counter_hours[22],
            '23点': counter_hours[23],
            '24点': counter_hours[24],
        }

        # print(self.weekdaysDict)
        # print(self.hoursDict)
        data = {
            "weekdaysDict": self.weekdaysDict,
            "hoursDict": self.hoursDict
        }

        return data


    def extrac_Week_Hour(self, time_str):
        # 给定的时间字符串可能为NOTFOUND或者空字符
        if "" != time_str and "NOTFOUND" != time_str:
            # 将字符串转换为datetime对象
            dt = datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S.%fZ")
            # 提取出周几，注意Python的weekday()函数返回的是0（周一）到6（周日）
            weekday = dt.weekday() + 1
            # 提取出小时
            hour = dt.hour
            # print(f"周几：{weekday}, 几时：{hour}")
            self.weekdays.append(weekday)
            self.hours.append(hour)


# 以脚本方式启动
if __name__ == "__main__":
    # 捕捉异常错误
    try:
        ts = TimeStatistical()
        ts.statistical()
    except Exception as e:
        print("错误:", e)
